Linear Regression with one variable. Course: Machine Learning Prof.: Andrew Ng


In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
data = np.loadtxt('/home/alien/Desktop/ML/ex1data1.txt', delimiter=',')

In [3]:
y = data[:,1]
m = len(y)
x = data[:,0]
X = np.ones(shape = (m,2))
X[:,1] = x
theta = np.zeros(shape = (2,1))

In [4]:
def computeCost(x,y,theta):
    m = len(y)
    h = x.dot(theta)
    square_errors = (h - y)**2
    J = (1.0/(2*m)) * square_errors.sum()
    return J

In [5]:
def gradientDescent(X, y, theta, alpha, iterations):
    m = len(y)
    J_history = np.zeros(shape = (iterations,1))
    for i in range(iterations):
        
        predictions = X.dot(theta).flatten()

        errors_x1 = (predictions - y) * X[:, 0]
        errors_x2 = (predictions - y) * X[:, 1]

        theta[0][0] = theta[0][0] - alpha * (1.0 / m) * errors_x1.sum()
        theta[1][0] = theta[1][0] - alpha * (1.0 / m) * errors_x2.sum()

        J_history[i, 0] = computeCost(X, y, theta)

    return theta, J_history

In [6]:
print computeCost(X, y, theta)
theta, J_history = gradientDescent(X,y,theta,0.01,1500)


3111.05518611

In [7]:
print 'For population = 35,000, we predict a profit of %f' % (np.array([1, 3.5]).dot(theta) * 10000)


For population = 35,000, we predict a profit of 4519.767868

In [8]:
print 'For population = 70,000, we predict a profit of %f' % (np.array([1, 7]).dot(theta) * 10000)


For population = 70,000, we predict a profit of 45342.450129

In [10]:
plt.scatter(data[:, 0], data[:, 1], marker='o', c='b')
plt.title('Profits distribution')
plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')


Out[10]:
<matplotlib.text.Text at 0x7f08e6972690>

In [ ]:
#References: https://gist.github.com/marcelcaraciolo/1321575